)
),
degree_cat_women = factor(
degree_cat_women,
levels = c(
paste0(1:8)
),
labels = c(
"0-1",
"2-5",
"6-10",
"11-15",
"16-20",
"21-25",
"26-30",
">30"
)
)
)
plot_adj_mat_count_both <- plyr::ddply(
plot_adj_mat,
.(age_category, sex, degree_cat),
summarize,
count = n()
)
plot_adj_mat_count_both_age <- plyr::ddply(
plot_adj_mat,
.(sex),
summarize,
age_count = n()
) %>% mutate(
sex = ifelse(sex == "MALE", "Men", "Women")
)
plot_adj_mat_count_women <- plyr::ddply(
plot_adj_mat,
.(age_category, sex, degree_cat_women),
summarize,
count_women = n()
)
plot_adj_mat_count <- data.frame(
age_category = sort(rep(unique(plot_adj_mat$age_category), 2)),
sex          = rep(c("MALE", "FEMALE"), 12)
)
plot_adj_mat_count_list <- list()
for (i in 1:8)
{
plot_adj_mat_count$degree <- i
plot_adj_mat_count_list[[length(plot_adj_mat_count_list) + 1]] <- plot_adj_mat_count
}
plot_adj_mat_count <- do.call(bind_rows, plot_adj_mat_count_list)
plot_adj_mat_count <- mutate(
plot_adj_mat_count,
degree = factor(
degree,
levels = c(
paste0(1:8)
),
labels = c(
"0-1",
"2-5",
"6-10",
"11-15",
"16-20",
"21-25",
"26-30",
">30"
)
)
) %>% left_join(
plot_adj_mat_count_both,
by = c(
"age_category", "sex", "degree" = "degree_cat"
)
) %>% left_join(
plot_adj_mat_count_women,
by = c(
"age_category", "sex", "degree" = "degree_cat_women"
)
) %>% mutate(
count = ifelse(is.na(count), 0, count),
count_women = ifelse(is.na(count_women), 0, count_women),
sex = ifelse(sex == "MALE", "Men", "Women")
) %>% reshape2::melt(
id = c(
"age_category", "sex", "degree"
)
) %>% mutate(
variable = ifelse(variable == "count", "All Co-Participants", "Women Co-Participants")
) %>% left_join(
plot_adj_mat_count_both_age
) %>% mutate(
value = value/age_count
)
# Define breaks and labels for 10 levels
ggplot(plot_adj_mat_count, aes(x = age_category, y = degree, fill = value)) +
geom_tile(color = "white",
lwd = 1,
linetype = 1) +
facet_grid(variable ~ sex) +
labs(x = "Age", y = "Number of co-participants", fill = "Proportion of co-\nparticipants by age") +
scale_fill_viridis_c(option = "F")  +
scale_x_discrete(breaks = function(x){x[c(TRUE, FALSE)]}) +
theme_bw() +
theme(legend.position = "bottom",
axis.text.y = element_text(size = 14),
axis.text.x = element_text(size = 14),
legend.text = element_text(size = 14),
legend.title = element_text(size = 18),
strip.text = element_text(size = 18),
axis.title = element_text(size = 18),
legend.spacing.x = unit(2.0, 'cm')) +
guides(fill = guide_colorbar(frame.colour = "black",
title.vjust = 1.5,
barwidth = 10,
barheight = 1.5))
ggsave("rwandan_women/plot/age_coparts_sex.png", width = 10, height = 9, units = "in")
## Set up the data for the analysis
## Did the commit a violence offense
data.for.analysis$Offense_type <- 0
data.for.analysis$Offense_type[data.for.analysis$cat1_G > 0 | data.for.analysis$cat2_G > 0] <- 1
## Drop people found not guilty
data_num <- data.for.analysis$cat1_G + data.for.analysis$cat2_G + data.for.analysis$cat3_G
data.for.analysis <- subset(data.for.analysis, data_num > 0)
mean(data.for.analysis$age, na.rm = T)
pred_list_1 <- list()
pred_list_2 <- list()
for (i in 1:300)
{
gc()
m1 <- readRDS(paste0("rwandan_women/model_output/boot_output/m1_sim_", i, ".rds"))
m2 <- readRDS(paste0("rwandan_women/model_output/boot_output/m2_sim_", i, ".rds"))
pred_df_logistic <- data.frame(
age_cat = sort(rep(-3:4, 4)),
age_cat_sqr = sort(rep((-3:4)^2, 4)),
sex = rep(c(rep("MALE", 2), rep("FEMALE", 2)), 7),
white_collar_dummy = rep(0:1, 14),
farmer_dummy = 0,
rwandan_dummy = 0
)
pred_stage_1_m2 <- predict(m1, type = "zero", newdata = pred_df_logistic, se.fit = T)
pred_stage_2_m2 <- predict(m1, type = "count", newdata = pred_df_logistic, se.fit = T)
pred_df_logistic$stage_1 <- pred_stage_1_m2
pred_df_logistic$stage_2 <- pred_stage_2_m2
pred_df_logistic$sex <- ifelse(pred_df_logistic$sex == "FEMALE", "WOMEN", "MEN")
pred_df_logistic$white_collar_dummy <- ifelse(pred_df_logistic$white_collar_dummy == 1, "White collar", "Not white collar")
pred_list_1[[i]] <- bind_rows(
dplyr::select(
pred_df_logistic,
c(
age_cat, stage_1, sex, white_collar_dummy
)
) %>% dplyr::rename(
pred = stage_1
) %>% mutate(
stage = "Stage 1 (logistic regression)"
),
dplyr::select(
pred_df_logistic,
c(
age_cat, stage_2, sex, white_collar_dummy
)
) %>% dplyr::rename(
pred = stage_2
) %>% mutate(
stage = "Stage 2 (Poisson regression)"
)
) %>% mutate(
boot = i
)
pred_df_logistic <- data.frame(
age_cat = sort(rep(2:8, 4)),
age_cat_sqr = sort(rep((2:8)^2, 4)),
sex = rep(c(rep("MALE", 2), rep("FEMALE", 2)), 7),
white_collar_dummy = rep(0:1, 14),
farmer_dummy = 0,
rwandan_dummy = 0
)
pred_stage_1_m2 <- predict(m2, type = "zero", newdata = pred_df_logistic, se.fit = T)
pred_stage_2_m2 <- predict(m2, type = "count", newdata = pred_df_logistic, se.fit = T)
pred_df_logistic$stage_1 <- pred_stage_1_m2
pred_df_logistic$stage_2 <- pred_stage_2_m2
pred_df_logistic$sex <- ifelse(pred_df_logistic$sex == "FEMALE", "WOMEN", "MEN")
pred_df_logistic$white_collar_dummy <- ifelse(pred_df_logistic$white_collar_dummy == 1, "White collar", "Not white collar")
pred_list_2[[i]] <- bind_rows(
dplyr::select(
pred_df_logistic,
c(
age_cat, stage_1, sex, white_collar_dummy
)
) %>% dplyr::rename(
pred = stage_1
) %>% mutate(
stage = "Stage 1 (logistic regression)"
),
dplyr::select(
pred_df_logistic,
c(
age_cat, stage_2, sex, white_collar_dummy
)
) %>% dplyr::rename(
pred = stage_2
) %>% mutate(
stage = "Stage 2 (Poisson regression)"
)
) %>% mutate(
boot = i
)
cat(paste0("\rPct done: ", round(i/300, 2), "."))
}
saveRDS(pred_list_2, file = "rwandan_women/model_output/boot_output/pred_list_2.rds")
pred_list_1 <- readRDS("rwandan_women/model_output/boot_output/pred_list_1.rds")
pred_list_2 <- readRDS("rwandan_women/model_output/boot_output/pred_list_2.rds")
pred_1 <- do.call(bind_rows, pred_list_1)
pred_2 <- do.call(bind_rows, pred_list_2)
## Plot the predictions
pred_1$sex <- factor(
pred_1$sex,
levels = c("MEN", "WOMEN"),
labels = c("Men", "Women")
)
pred_2$sex <- factor(
pred_2$sex,
levels = c("MEN", "WOMEN"),
labels = c("Men", "Women")
)
pred_1$outcome <- "All Coparticipants"
pred_2$outcome <- "Women Coparticipants"
pred_1_summary$outcome <- "All Coparticipants"
pred_2_summary$outcome <- "Women Coparticipants"
pred_2$label <- ifelse(grepl("1", pred_2$stage), paste0(pred_2$outcome, " Stage 1 (logistic regression)"), paste0(pred_2$outcome, " Stage 2 (Poisson regression)"))
pred_1_summary <- plyr::ddply(
pred_1,
.(age_cat, white_collar_dummy, sex, label, stage),
summarize,
mean = mean(pred),
lb = quantile(pred, 0.025),
ub = quantile(pred, 0.975)
)
pred_1_summary <- data.frame(pred_1_summary)
pred_2_summary <- data.frame(pred_2_summary)
pred_1$lb <- as.numeric(NA)
pred_1$ub <- as.numeric(NA)
pred_1$mean <- as.numeric(NA)
pred_2$lb <- as.numeric(NA)
pred_2$ub <- as.numeric(NA)
pred_2$mean <- as.numeric(NA)
models_1_2 <- bind_rows(
pred_1_summary,
pred_2_summary
) %>% filter(
white_collar_dummy == "Not white collar"
) %>% mutate(
label = factor(
label,
levels = c(
"All Coparticipants Stage 1 (logistic regression)",
"Women Coparticipants Stage 1 (logistic regression)",
"All Coparticipants Stage 2 (Poisson regression)",
"Women Coparticipants Stage 2 (Poisson regression)"
),
labels = c(
"All Coparticipants\nStage 1 (Participation in violence)",
"Women Coparticipants\nStage 1 (Participation in violence)",
"All Coparticipants\nStage 2 (Influence in militias)",
"Women Coparticipants\nStage 2 (Influence in militias)"
)
)
)
3:4
-3:4
pred_list_1 <- list()
pred_list_2 <- list()
for (i in 1:300)
{
gc()
m1 <- readRDS(paste0("rwandan_women/model_output/boot_output/m1_sim_", i, ".rds"))
m2 <- readRDS(paste0("rwandan_women/model_output/boot_output/m2_sim_", i, ".rds"))
pred_df_logistic <- data.frame(
age_cat = sort(rep(-3:3, 4)),
age_cat_sqr = sort(rep((-3:3)^2, 4)),
sex = rep(c(rep("MALE", 2), rep("FEMALE", 2)), 7),
white_collar_dummy = rep(0:1, 14),
farmer_dummy = 0,
rwandan_dummy = 0
)
pred_stage_1_m2 <- predict(m1, type = "zero", newdata = pred_df_logistic, se.fit = T)
pred_stage_2_m2 <- predict(m1, type = "count", newdata = pred_df_logistic, se.fit = T)
pred_df_logistic$stage_1 <- pred_stage_1_m2
pred_df_logistic$stage_2 <- pred_stage_2_m2
pred_df_logistic$sex <- ifelse(pred_df_logistic$sex == "FEMALE", "WOMEN", "MEN")
pred_df_logistic$white_collar_dummy <- ifelse(pred_df_logistic$white_collar_dummy == 1, "White collar", "Not white collar")
pred_list_1[[i]] <- bind_rows(
dplyr::select(
pred_df_logistic,
c(
age_cat, stage_1, sex, white_collar_dummy
)
) %>% dplyr::rename(
pred = stage_1
) %>% mutate(
stage = "Stage 1 (logistic regression)"
),
dplyr::select(
pred_df_logistic,
c(
age_cat, stage_2, sex, white_collar_dummy
)
) %>% dplyr::rename(
pred = stage_2
) %>% mutate(
stage = "Stage 2 (Poisson regression)"
)
) %>% mutate(
boot = i
)
pred_df_logistic <- data.frame(
age_cat = sort(rep(-3:3, 4)),
age_cat_sqr = sort(rep((-3:3)^2, 4)),
sex = rep(c(rep("MALE", 2), rep("FEMALE", 2)), 7),
white_collar_dummy = rep(0:1, 14),
farmer_dummy = 0,
rwandan_dummy = 0
)
pred_stage_1_m2 <- predict(m2, type = "zero", newdata = pred_df_logistic, se.fit = T)
pred_stage_2_m2 <- predict(m2, type = "count", newdata = pred_df_logistic, se.fit = T)
pred_df_logistic$stage_1 <- pred_stage_1_m2
pred_df_logistic$stage_2 <- pred_stage_2_m2
pred_df_logistic$sex <- ifelse(pred_df_logistic$sex == "FEMALE", "WOMEN", "MEN")
pred_df_logistic$white_collar_dummy <- ifelse(pred_df_logistic$white_collar_dummy == 1, "White collar", "Not white collar")
pred_list_2[[i]] <- bind_rows(
dplyr::select(
pred_df_logistic,
c(
age_cat, stage_1, sex, white_collar_dummy
)
) %>% dplyr::rename(
pred = stage_1
) %>% mutate(
stage = "Stage 1 (logistic regression)"
),
dplyr::select(
pred_df_logistic,
c(
age_cat, stage_2, sex, white_collar_dummy
)
) %>% dplyr::rename(
pred = stage_2
) %>% mutate(
stage = "Stage 2 (Poisson regression)"
)
) %>% mutate(
boot = i
)
cat(paste0("\rPct done: ", round(i/300, 2), "."))
}
pred_values <- readRDS("C:/Users/jxe220000/Dropbox/cooperative_regimes/data_processing/model_output/pred_values.rds")
View(pred_values)
# ----------------------------- #
#    Master replication file    #
# ----------------------------- #
# Edgerton (2023) JOP
# R version 4.2.2 (2022-10-31 ucrt)
# Platform: x86_64-w64-mingw32/x64 (64-bit)
# Running under: Windows 10 x64 (build 22621)
#
# Matrix products: default
#
# locale:
#   [1] LC_COLLATE=English_United States.utf8
# [2] LC_CTYPE=English_United States.utf8
# [3] LC_MONETARY=English_United States.utf8
# [4] LC_NUMERIC=C
# [5] LC_TIME=English_United States.utf8
#
# attached base packages:
#   [1] parallel  stats     graphics  grDevices utils
# [6] datasets  methods   base
#
# other attached packages:
#   [1] glmnet_4.1-8        missForest_1.5
# [3] Metrics_0.1.4       precrec_0.14.2
# [5] speedglm_0.3-5      biglm_0.9-2.1
# [7] DBI_1.1.3           caret_6.0-94
# [9] lattice_0.21-8      tseries_0.10-53
# [11] forecast_8.21       vars_1.5-9
# [13] lmtest_0.9-40       urca_1.3-3
# [15] strucchange_1.5-3   sandwich_3.0-2
# [17] zoo_1.8-10          MASS_7.3-58.3
# [19] doFuture_1.0.0      future_1.32.0
# [21] foreach_1.5.2       cowplot_1.1.1
# [23] assortnet_0.20      plotROC_2.3.0
# [25] ggplot2_3.4.2       tidyr_1.2.0
# [27] pROC_1.18.0         optimx_2022-4.30
# [29] fixest_0.10.4       ClusterR_1.3.0
# [31] cluster_2.1.4       tibble_3.2.1
# [33] reshape2_1.4.4      intergraph_2.0-2
# [35] purrr_1.0.2         tidygraph_1.2.3
# [37] alpaca_0.3.4        network_1.18.1
# [39] readxl_1.4.1        haven_2.5.0
# [41] data.table_1.14.2   btergm_1.10.6
# [43] brms_2.19.0         Rcpp_1.0.9
# [45] rstudioapi_0.15.0   texreg_1.38.6
# [47] countrycode_1.4.0   igoR_0.1.4
# [49] lme4_1.1-32         Matrix_1.5-4
# [51] peacesciencer_1.1.0 igraph_1.3.4
# [53] dplyr_1.1.1         plyr_1.8.7
#
# loaded via a namespace (and not attached):
#   [1] utf8_1.2.2              tidyselect_1.2.0
# [3] htmlwidgets_1.6.2       grid_4.2.2
# [5] gmp_0.7-1               munsell_0.5.0
# [7] codetools_0.2-19        DT_0.28
# [9] miniUI_0.1.1.1          withr_2.5.0
# [11] Brobdingnag_1.2-9       colorspace_2.1-0
# [13] lpSolveAPI_5.5.2.0-17.9 knitr_1.43
# [15] stats4_4.2.2            ROCR_1.0-11
# [17] robustbase_0.95-1       stevemisc_1.6.0
# [19] TTR_0.24.3              bayesplot_1.10.0
# [21] listenv_0.9.0           Rdpack_2.5
# [23] rstan_2.26.22           farver_2.1.1
# [25] bridgesampling_1.1-2    coda_0.19-4
# [27] parallelly_1.36.0       vctrs_0.6.3
# [29] generics_0.1.3          ipred_0.9-14
# [31] xfun_0.40               itertools_0.1-3
# [33] randomForest_4.7-1.1    R6_2.5.1
# [35] markdown_1.8            arm_1.13-1
# [37] cachem_1.0.7            promises_1.2.0.1
# [39] scales_1.2.1            nnet_7.3-19
# [41] gtable_0.3.4            globals_0.16.2
# [43] processx_3.8.0          timeDate_4022.108
# [45] rlang_1.1.1             splines_4.2.2
# [47] ModelMetrics_1.2.2.2    trust_0.1-8
# [49] checkmate_2.1.0         inline_0.3.19
# [51] abind_1.4-5             threejs_0.3.3
# [53] crosstalk_1.2.0         backports_1.4.1
# [55] quantmod_0.4.25         httpuv_1.6.9
# [57] lava_1.7.2.1            tensorA_0.36.2
# [59] tools_4.2.2             statnet.common_4.8.0
# [61] ellipsis_0.3.2          posterior_1.4.1
# [63] base64enc_0.1-3         ps_1.7.4
# [65] prettyunits_1.1.1       rpart_4.1.19
# [67] dreamerr_1.3.0          fracdiff_1.5-2
# [69] magrittr_2.0.3          ergm_4.4.0
# [71] sna_2.7-1               colourpicker_1.3.0
# [73] mvtnorm_1.1-3           matrixStats_0.63.0
# [75] hms_1.1.3               shinyjs_2.1.0
# [77] mime_0.12               evaluate_0.21
# [79] xtable_1.8-4            shinystan_2.6.0
# [81] shape_1.4.6             gridExtra_2.3
# [83] rstantools_2.3.1.1      compiler_4.2.2
# [85] V8_4.3.3                crayon_1.5.2
# [87] minqa_1.2.5             StanHeaders_2.26.27
# [89] htmltools_0.5.5         later_1.3.0
# [91] Formula_1.2-5           RcppParallel_5.1.7
# [93] lubridate_1.8.0         boot_1.3-28.1
# [95] cli_3.6.1               quadprog_1.5-8
# [97] rbibutils_2.2.13        gower_1.0.1
# [99] forcats_1.0.0           pkgconfig_2.0.3
# [101] geosphere_1.5-18        numDeriv_2016.8-1.1
# [103] sp_1.6-0                recipes_1.0.7
# [105] dygraphs_1.1.1.6        hardhat_1.3.0
# [107] rngtools_1.5.2          prodlim_2023.03.31
# [109] doRNG_1.8.6             stringr_1.5.0
# [111] distributional_0.3.2    callr_3.7.3
# [113] digest_0.6.29           rle_0.9.2
# [115] rmarkdown_2.24          cellranger_1.1.0
# [117] curl_5.0.0              shiny_1.7.5
# [119] gtools_3.9.4            nloptr_2.0.3
# [121] lifecycle_1.0.3         nlme_3.1-162
# [123] jsonlite_1.8.7          fansi_1.0.3
# [125] labelled_2.12.0         pillar_1.9.0
# [127] loo_2.6.0               survival_3.5-5
# [129] fastmap_1.1.1           httr_1.4.7
# [131] DEoptimR_1.1-1          pkgbuild_1.4.2
# [133] glue_1.6.2              xts_0.13.0
# [135] shinythemes_1.2.0       iterators_1.0.14
# [137] class_7.3-21            stringi_1.7.8
# [139] memoise_2.0.1           future.apply_1.11.0
## Clear working space
rm(list = ls())
## Check packages
packages <- c('plyr', 'dplyr', 'igraph', "peacesciencer",
'lme4', 'igoR', "countrycode", 'texreg', 'rstudioapi',
'brms', "btergm", "stats", "data.table",
'haven', 'readxl', 'Matrix', 'network', 'alpaca',
'tidygraph', 'purrr', 'intergraph', 'reshape2',
'network', 'tibble', 'cluster', 'ClusterR',
'fixest', 'optimx', 'pROC', 'tidyr', 'parallel',
'plotROC', 'ggplot2', 'texreg', 'plotROC', 'assortnet',
'cowplot', 'doFuture', 'parallel', 'fixest',
'vars', 'forecast', 'tseries', 'caret', 'speedglm',
'precrec', 'Metrics', 'missForest', 'tidyr', 'glmnet')
# renv::restore()
## Load missing packages
if (length(setdiff(packages, rownames(installed.packages()))) != 0)
{
install.packages(setdiff(packages, rownames(installed.packages())))
}
lapply(packages, library, character.only = T)
## Set your personal working directory
working_directory <- "cooperative_regimes/data_processing"
if (!grepl(working_directory, getwd()))
{
setwd(working_directory)
}
## 14. Data viz and tables
suppressMessages({
suppressWarnings({
source("14_data_visualization_tables_replication.r")
})
})
gc()
